##############################################################
##############################################################
### Replication Materials for
### Stefan Müller and Liam Kneafsey:
### Evidence for the Irrelevance of Irrelevant Events
### Political Science Research and Methods
###
### Please get in touch with the authors if you have any questions: 
### stefan.mueller@ucd.ie
### Note: 0000_readme.pdf contains 
### instructions and details about each script and dataset
##############################################################
##############################################################


## load packages
library(dplyr)   # CRAN v1.0.5
library(tidyr)   # CRAN v1.1.3
library(MuMIn)   # CRAN v1.43.17
library(stringr) # CRAN v1.4.0
library(cowplot) # CRAN v1.1.1
library(car)     # CRAN v3.0-10
library(ggplot2) # CRAN v3.3.3
library(here)    # CRAN v1.0.1

here::here()
## alternatively, set working directory manually
## setwd("...")

source("function_theme_base.R")

## parts of this code are adjusted from
## https://dcosme.github.io/2019/02/26/specification-curve-example/

## load dataset with elections
dat_local_raw <- readRDS("data_elections_local_full.rds") 


## only keep: incumbents whose county either lost or won a game
## = all "treated" incumbents
## the coefficienf for winner_loser_before_num will show 
## whether an incumbent increases her vote share when the county team
## won shortly before the election (winner_loser_before_num = 1)

dat_local_incumbents_treated <- dat_local_raw %>% 
    filter(incumbent == "Incumbent (Candidate elected in t-1)") %>% 
    filter(winner_loser_before != "Untreated") %>% 
    mutate(decade = factor(paste0(substr(election_id, 1, 3), "0"))) %>% 
    mutate(winner_loser_before = as.character(winner_loser_before)) %>% 
    mutate(winner_loser_before_num = ifelse(winner_loser_before == "Win", 1, 0))


#                # create a dummy for strongholds
dat_local_incumbents_treated <- dat_local_incumbents_treated %>% 
    mutate(stronghold_dummy = ifelse(str_detect(stronghold_win_loss, "Stronghold"), 
                                     "Stronghold", "Non-stronghold"))


## set na.action for dredge
options(na.action = "na.fail")


## select only the variables included in the regression models 
## and omit missing values to have the same N across all models

dat_local_incumbents_treated_no_na <- dat_local_incumbents_treated %>% 
    ungroup() %>% 
    select(diff_share, county_gaa, 
           winner_loser_before_num, turnout, 
           election_id, sport, margin,
           party_recoded, stronghold_dummy) %>% 
    na.omit() 


summary(dat_local_incumbents_treated_no_na$winner_loser_before_num)

## number of complete observations in this subset
nrow(dat_local_incumbents_treated_no_na)

## run full model
full_model_local <- lm(diff_share ~ 
                           winner_loser_before_num +
                           county_gaa +
                           turnout +
                           margin +
                           election_id +
                           sport +
                           party_recoded +
                           stronghold_dummy,
                       data = dat_local_incumbents_treated_no_na)


## run all possible nested models but only keep thos models that include
## the treatment variable (winner_loser_before)

all_models_local <-  dredge(full_model_local) 

nrow(all_models_local)

## filter only models that include the main independent variable of interest (winner/loser)
all_models_local <- filter(all_models_local, !is.na(winner_loser_before_num))

nrow(all_models_local)

## get coefficients and standard errors for all estimates in each model
all_models_local_coefs <- coefTable(all_models_local)

length(all_models_local_coefs)

## transform list of coefficients and standard errors to a data frame
df_all_models_local_coefs <- do.call(rbind.data.frame, all_models_local_coefs)

## get coefficient name from rownames
df_all_models_local_coefs$coefficient <- rownames(df_all_models_local_coefs)


## filter only those coefficients relating to winner_loser_before_num
## because these values are needed to extract the standard errors of the coefficients
df_coefs_se_winner_loser_before <- filter(df_all_models_local_coefs, 
                                          str_detect(coefficient, "winner_loser")) 

#                # create variable for merging below
df_coefs_se_winner_loser_before <- df_coefs_se_winner_loser_before %>% 
    mutate(winner_loser_before_num = Estimate) 

## merge standard errors for the independent variable of interest
## by using the point estimates of winner_loser_before_num
## as the merging variable (not elegant, but not possible to do it differently)

## check that estimate colums are unique 
unique_valid_models <- nrow(filter(all_models_local, !is.na(winner_loser_before_num)))
unique_valid_models

unique_coefs <- length(unique(all_models_local$winner_loser_before_num))
unique_coefs


unique_coefs_se <- length(unique(df_coefs_se_winner_loser_before$winner_loser_before_num))
unique_coefs_se



## this code works only if the number of unique 
## coefficients is the same in both dataframes!
## Be very careful if you adjust this code for your own work.

## stop the code if the numbers differ
stopifnot(unique_coefs_se == unique_coefs)


all_models_winner_loser_before <- left_join(all_models_local,
                                            df_coefs_se_winner_loser_before,
                                            by = c("winner_loser_before_num"))


nrow(all_models_winner_loser_before)
nrow(all_models_local)

## get variables included in the model
variable.names <- names(dat_local_incumbents_treated_no_na)[-1]


## prepare data for plot
plot_data_local <-  all_models_winner_loser_before %>%
    filter(!is.na(Estimate)) %>% ## remove models without coefficient for winner_loser_before_num
    arrange(Estimate) %>% ## arrange estimates in ascending order
    mutate(specification = row_number()) 

nrow(plot_data_local) ## number of models



## check how many coefficients are positive/negative and how many are
## statistically significant

plot_data_local_check <- plot_data_local %>% 
    mutate(significant_95 = (Estimate - 1.96 * `Std. Error`) > 0 |
               (Estimate + 1.96 * `Std. Error`) < 0) %>% 
    mutate(direction = ifelse(Estimate > 0, "Positive",
                              ifelse(Estimate < 0, "Negative", "0")))


table(plot_data_local_check$direction,
      plot_data_local_check$significant_95)




plot_top_local <- plot_data_local %>%
    ggplot(aes(x = specification, 
               y = Estimate)) + 
    geom_linerange(aes(ymin = Estimate - 1.96 * `Std. Error`,
                       ymax = Estimate + 1.96 * `Std. Error`),
                   colour = "grey50") +
    geom_point(size = 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") + ## dashed line for null model AIC
    labs(x = "", y = "Coefficient estimate for Winner")
plot_top_local

## note: warning will appear because not all models contain all variables 
## (therefore, the attributes are not identical and will be dropped)

plot_data_local_long <-  plot_data_local %>%
    gather(variable, value, eval(variable.names)) %>% 
    mutate(value = ifelse(!is.na(value), "|", "")) ## plot a line if included in the model


## remove winner_loser_before_num because this variable is 
## included in all models and this coefficient is displayed in the graph

plot_data_local_long <- plot_data_local_long %>% 
    filter(variable != "winner_loser_before_num")


table(plot_data_local_long$variable)

recode_vars <- c("'county_gaa'='County team';
                 'difference'='Days after match';
                 'election_id'='Election';
                 'margin'='Margin of result';
                 'party_recoded'='Party';
                 'sport'='Sport';
                 'stronghold_dummy'='Stronghold';
                 'turnout'='Turnout'")



plot_data_local_long <- plot_data_local_long %>% 
    mutate(variable = car::recode(variable, recode_vars))


plot_bottom_local <-   ggplot(plot_data_local_long,
                              aes(specification, variable)) +
    geom_text(aes(label = value), alpha = 1) +
    labs(x = "Specification number", y = "Variables")

plot_bottom_local

## combine both plots
plot_grid(plot_top_local, plot_bottom_local, ncol = 1, align = "v",
          rel_heights = c(0.6, 0.4))
ggsave("fig_a16.pdf",
       width = 9, height = 5.5)

## script executed successfully on
date()

sessionInfo()
